#!/usr/bin/Rscript
##########################################################################################
# Issue Ownership and Agenda Setting in the 2019 Swiss National Elections
##########################################################################################
# Description:
# Script for Descriptive Tables
##########################################################################################
# Content
##########################################################################################
# 1) Dependencies
# 2) Startup
# 3) Load Data
# 4) Transform Data
# 5) Descriptive Tables
##########################################################################################
# 1) Dependencies
##########################################################################################
library(dplyr)
library(tidyverse)
library(data.table)
library(readr)
library(lubridate)
library(ggplot2)
library(ggExtra)
library(ggeffects)
library(ggrepel)
library(ggforce)
library(igraph)
library(graphlayouts)
library(ggraph)
library(scales)
library(purrr)
library(magrittr)
library(cowplot)
library(sysfonts)
##########################################################################################
# 2) Startup
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)


# - load fonts used in plots
sysfonts::font_add_google("Montserrat", "Montserrat")
sysfonts::font_add_google("Roboto", "Roboto")

# - load ddl theme
suppressMessages(suppressWarnings(source('../ggplot_theme_ddl.R')))
# - pre-setup for figure 1
ddl_theme_2 <- theme(legend.position = "bottom", legend.title = element_blank(), 
                     strip.background = element_blank(), strip.text = element_text(color = "black"),
                     axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.0, size = 16, color = "black"),  
                     axis.text.y = element_text(hjust=0, size = 16, color = "black"),
                     axis.ticks.y = element_blank(),
                     strip.text.x = element_text(size = 16, color = "black"),
                     axis.title = element_text(size = 16, color = "black"),
                     legend.text = element_text(size = 16, color = "black"),
                     plot.title = element_blank(),
                     plot.margin = unit(c(.5,.5,.5,.5), "cm"),
                     legend.key.size = unit(1.5,"line"),
                     legend.key = element_blank(),
                     axis.line.x = element_line(color="black", size = .5),
                     axis.line.y = element_line(color="black", size = .5),
                     panel.spacing.y = unit(2, "lines"),
                     panel.spacing.x = unit(.8, "lines")) 

# - set directory
setwd(pathName)
parent_path <- getwd()
##########################################################################################
# 3) Load Data
##########################################################################################
press <- read_rds("../Data/pressreleases_2018-2019.RDS")
smd <- read_rds("../Data/smd_minified_2018-2019.RDS")
tweets <- read_rds("../Data/Tweets_minified_2018_2019_curated_sentiment_class.RDS")
##########################################################################################
# 4) Transform Data
##########################################################################################
# Filter only Election Year:
press <- press %>% dplyr::filter(pubDateTime > as.Date("2019-01-01")) %>% 
  dplyr::filter(pubDateTime < as.Date("2019-10-21")) %>% 
  dplyr::filter(!`Akteur` %in% c("Travail Suisse", "Schweizer Bauernverband", 
                                 "Schweizerischer Gewerkschaftsbund", 
                                 "Verband Schweizerischer Elektrizitätsunternehmen", 
                                 "Sante Suisse","Schweizerischer Baumeisterverband", 
                                 "Economie Suisse", "Schweizerischer Gewerbeverband", 
                                 "Schweizerischer Bankiervereinigung"))

smd <- smd %>% dplyr::filter(pubDateTime > as.Date("2019-01-01")) %>% 
  dplyr::filter(pubDateTime < as.Date("2019-10-21"))

tweets <- tweets %>% dplyr::filter(Datum > as.Date("2019-01-01")) %>% 
  dplyr::filter(Datum < as.Date("2019-10-21")) %>% 
  dplyr::filter(Is_retweet == FALSE) %>% 
  dplyr::filter(`Akteur.Typ` != "Organisation")

tweets <- tweets %>% dplyr::filter(Party %in% c("grÜne (basels starke alternative)", "grÜnliberale partei", 
                                                "sozialdemokratische partei der schweiz", "alternative - die grÜnen zug",
                                                "schweizerische volkspartei", "fdp.die liberalen", 
                                                "christlichsoziale volkspartei oberwallis",
                                                "grÜne partei der schweiz", "christlich-soziale partei",
                                                "christdemokratische volkspartei der schweiz",
                                                "christlichdemokratische volkspartei der schweiz",
                                                "bÜrgerlich-demokratische partei schweiz", "NA", NA)) %>% 
  dplyr::mutate(Party = case_when(Party %in% c("grÜne (basels starke alternative)", 
                                               "grÜne partei der schweiz", 
                                               "alternative - die grÜnen zug") ~ "Grüne",
                                  Party %in% c("sozialdemokratische partei der schweiz") ~ "SP",
                                  Party %in% c("schweizerische volkspartei") ~ "SVP",
                                  Party %in% c("fdp.die liberalen") ~ "FDP",
                                  Party %in% c("christdemokratische volkspartei der schweiz",
                                               "christlichdemokratische volkspartei der schweiz",
                                               "christlich-soziale partei", 
                                               "christlichsoziale volkspartei oberwallis") ~ "CVP",
                                  Party %in% c("grÜnliberale partei") ~ "GLP",
                                  Party %in% c("bÜrgerlich-demokratische partei schweiz") ~"BDP",
                                  TRUE ~ Party))

range(press$pubDateTime)
range(smd$pubDateTime)
range(tweets$Datum)


## 4.1) Encode Paper Names in so_txt since there are errors in the smd database:
smd <- smd %>% dplyr::mutate(so_txt = case_when(
  so_txt %in% c("20 minuten online", "20 minutes","20 minuti") ~ "20 minuten", 
  so_txt %in% c("Newsnet / 24 heures") ~ "24 heures",
  so_txt %in% c("Newsnet / Basler Zeitung") ~ "Basler Zeitung",
  so_txt %in% c("Newsnet / Berner Zeitung") ~ "Berner Zeitung",
  so_txt %in% c("Newsnet / Der Bund") ~ "Der Bund",
  so_txt %in% c("Newsnet / Le Matin") ~ "Le Matin",
  so_txt %in% c("Newsnet / Tribune de Genève", "Tribune de Genève") ~ "Tribune de Genève",
  so_txt %in% c("Newsnet / Tages-Anzeiger") ~ "Tages-Anzeiger",
  so_txt %in% c("Handelszeitung online") ~ "Handelszeitung",
  so_txt %in% c("rts.ch", "RTS.ch") ~ "srf.ch",
  so_txt %in% c("SWI swissinfo.ch") ~ "swissinfo.ch",
  so_txt %in% c("Finanz und Wirtschaft Online") ~ "Finanz und Wirtschaft",
  so_txt %in% c("Anzeigen von Uster", "Anzegier von Uster") ~ "Anzeiger von Uster",
  so_txt %in% c("L'Agefi") ~ "Agefi",
  so_txt %in% c("Aargauer Zeitung", "Aargauer Zeitung / MLZ") ~ "Aargauer Zeitung",
  so_txt %in% c("Migros-Magazin", "Migros Magazine") ~ "Migros-Magazin",
  so_txt %in% c("Cooperazione", "Coopzeitung", "Coopération") ~ "Coopzeitung",
  so_txt %in% c("L'Express / L'Impartial", "Arcinfo") ~ "Arcinfo",
  TRUE ~ so_txt))

## Double Classifications of Political System and something else are decoded to the other 
## highly likely topic, since it is of greater interest to know on what subject the article 
## is on rather than knowing only that it has to do with the political system 
## (eg. election / poll / party)
smd$selectsclass <- gsub(".*,", "", smd$selectsclass)
press$selectsclass <- gsub(".*,", "", press$selectsclass)
tweets$selectsclass <- gsub(".*,", "", tweets$selectsclass)

sort(unique(smd$so_txt))

# Add Time Variables
smd %<>% mutate(pubDateTime = ymd(pubDateTime),
                weekd = week(pubDateTime),
                monthd = month(pubDateTime),
                yeard = year(pubDateTime)) %>% 
         mutate(weekyear = paste0(weekd, "-", yeard),
                monthyear = paste0(monthd, "-", yeard))

press %<>% mutate(pubDateTime = ymd(pubDateTime),
                  weekd = week(pubDateTime),
                  monthd = month(pubDateTime),
                  yeard = year(pubDateTime))%>% 
           mutate(weekyear = paste0(weekd, "-", yeard),
                  monthyear = paste0(monthd, "-", yeard))


tweets %<>% mutate(Datum = ymd(Datum),
                   weekd = week(Datum),
                   monthd = month(Datum),
                   yeard = year(Datum))%>% 
            mutate(weekyear = paste0(weekd, "-", yeard),
                   monthyear = paste0(monthd, "-", yeard))
##########################################################################################
# 5) Descriptive Tables 
##########################################################################################
# Average number of Articles per Day per topic
smd %>% group_by(selectsclass, pubDateTime) %>% summarise(n = n()) %>% summarise(mean = mean(n)) %>% summarise(n = sum(mean))
tweets %>% group_by(selectsclass, Datum) %>% summarise(n = n()) %>% summarise(mean = mean(n)) 


# Total Numbers Tweets
tweets_org <- tweets
tweets_org <- tweets_org %>% filter(Akteur.Typ %in% c("Media", "Departement", "Party", "Administration", "Institute"))
dim(tweets_org)
length(unique(tweets_org$Screen_name))
# Total Numbers SMD
dim(smd)
length(unique(smd$so_txt))
# Total Numbers Pressreleases
dim(press)
length(unique(press$Akteur))
# Total number of Panel Participants
dim(panel)
length(unique(panel$id))

# Tweets by Parties
tweets2 <- tweets %>% filter(Party %in% c("GLP", "SVP", "FDP", "CVP", "SP", "Grüne"))
dim(filter(tweets2, Akteur.Typ == "Party"))
# Tweets by Politicians / Candidates
dim(filter(tweets2, Akteur.Typ == "Person"))

# Press releases with one of the four Topics:
dim(filter(press, selectsclass %in% c("GenderIssues_Discrimination", "Environment_Energy", "Immigration_Asylum", "EU_Europa")))

# SMD Articles with one of the four Topics:
dim(filter(smd, selectsclass %in% c("GenderIssues_Discrimination", "Environment_Energy", "Immigration_Asylum", "EU_Europa")))

# Tweets with one of the four Topics:
dim(filter(tweets, selectsclass %in% c("GenderIssues_Discrimination", "Environment_Energy", "Immigration_Asylum", "EU_Europe")))

# SMD Table Appendix (Frequencies by Paper)
smd_stat <- smd %>% group_by(so_txt, pubDateTime) %>% 
  summarise(n = n()) %>% 
  group_by(so_txt) %>% 
  summarise(Articles = sum(n), AVG = mean(n), min = min(n), max = max(n))

smd_stat$so_txt <- ifelse(smd_stat$so_txt == "Werdenberger & Obertoggenburger", "Werdenberger / Obertoggenburger", smd_stat$so_txt)

smd_stat <- smd_stat %>% mutate_if(is.numeric, round, 2)

stargazer::stargazer(smd_stat, summary = F, type = "latex", out = "../tables/table_A01.tex", header = F, digits = 2)
stargazer::stargazer(smd_stat, summary = F, type = "html", out = "../tables/table_A01.html", header = F, digits = 2)
##########################################################################################